## Cao and Prakash 2010 IO

setwd("/Users/ranjitlall/Documents/Ranjit's work/Harvard/Missing data in CIPE/Drafts/PA/Revisions Jun 2016/Replication materials/Cao and Prakash 2010 IO")
library(foreign)
library(Amelia)

## Load original dataset
cp2010 <- read.csv("CP2010 IO Rep Data.csv")
head(cp2010)
dim(cp2010)

## Drop ID vars

## How many variables? 44: no reduction necessary
dim(cp2010)

## Imputation
## What is average percentage of missing data?
NAs <- function(x) {
    as.vector(apply(x, 2, function(x) length(which(is.na(x)))))
    }
NAs(cp2010)
mean(NAs(cp2010)/nrow(cp2010))*100

## Thus: 51 imputations

## Note: log_so2pgppp and log_bodpgppp are already lagged
set.seed(02138)
cp2010.out <- amelia(cp2010, m = 51, ts = "year", cs = "country", polytime = 3, lags = c("interaction_3", "interaction_4", "interaction_5", "interaction_6"), empri = 0.01*nrow(cp2010))

write.amelia(obj= cp2010.out, file.stem = "CP2010 IO Imp Data", format = "dta", separate = FALSE)
